use "Output Data\regdata_analysis.dta", clear

rename *, lower
drop if year < 2000

egen comp_agency = group(companyid agencyid)
egen agency_year = group(agencyid year)
egen comp_year = group(companyid year)
egen agency_ind = group(agencyid naics)
egen agency_ind_year = group(agencyid naics year)

tsset comp_agency year

gen log_res = log(restrictions+1)
replace log_res = 0 if missing(log_res) & year <= 2019
gen L1_log_res = log(l1_restrictions+1)
replace L1_log_res = 0 if missing(L1_log_res) & year <= 2019
gen F1_log_res = log(f1_restrictions+1)
replace F1_log_res = 0 if missing(F1_log_res) & year <= 2019

gen connect_num_total = connect_num + connect_num_dw

gen connect_flag = 0 if !missing(connect_num)
replace connect_flag = 1 if connect_num > 0 & !missing(connect_num)
gen connect_flag_dw = 0 if !missing(connect_num_dw)
replace connect_flag_dw = 1 if connect_num_dw > 0 & !missing(connect_num_dw)
gen connect_flag_total = 0 if !missing(connect_num_total)
replace connect_flag_total = 1 if connect_num_total > 0 & !missing(connect_num_total)

qui: reghdfe connect_num L1_log_res log_res F1_log_res if year <= 2018, a(agency_year comp_year comp_agency) vce(cluster comp_agency)
estimates store m1
qui: reghdfe connect_num L1_log_res log_res F1_log_res if year <= 2018 & restrictions > 0 & !missing(restrictions), a(agency_year comp_year comp_agency) vce(cluster comp_agency)
estimates store m2









/*  Table 1: Summary Stats  */

bysort comp_year: egen connect_comp_flag = max(connect_flag) if _est_m1 == 1
bysort comp_year _est_m1: replace connect_comp_flag = . if _n ~= 1
bysort comp_year: egen connect_comp_flag_dw = max(connect_flag_dw) if _est_m1 == 1
bysort comp_year _est_m1: replace connect_comp_flag_dw = . if _n ~= 1
bysort comp_year: egen connect_comp_flag_total = max(connect_flag_total) if _est_m1 == 1
bysort comp_year _est_m1: replace connect_comp_flag_total = . if _n ~= 1

sum connect_comp_flag connect_comp_flag_dw connect_comp_flag_total

drop connect_comp_flag connect_comp_flag_dw connect_comp_flag_total

bysort comp_year: egen connect_comp_flag = max(connect_flag) if _est_m2 == 1
bysort comp_year _est_m2: replace connect_comp_flag = . if _n ~= 1
bysort comp_year: egen connect_comp_flag_dw = max(connect_flag_dw) if _est_m2 == 1
bysort comp_year _est_m2: replace connect_comp_flag_dw = . if _n ~= 1
bysort comp_year: egen connect_comp_flag_total = max(connect_flag_total) if _est_m2 == 1
bysort comp_year _est_m2: replace connect_comp_flag_total = . if _n ~= 1

sum connect_comp_flag connect_comp_flag_dw connect_comp_flag_total

drop connect_comp_flag connect_comp_flag_dw connect_comp_flag_total

sum connect_flag connect_flag_dw connect_flag_total if _est_m1 == 1
sum connect_flag connect_flag_dw connect_flag_total if _est_m2 == 1

/* Remaining summary stats for Panels A and B calculated in 01_RegData_Analysis and 05_1_Contracts_Analysis */








/*  Panel C: By Agency  */


bysort agencyid: egen agency_connect = mean(connect_flag) if _est_m1 == 1

table agencyid, statistic(mean agency_connect) nformat(%22.0g)
table agencyid, statistic(n agency_connect) nformat(%22.0g)









/*  Panel D: By Industry  */


gen naics2 = 0
replace naics2 = floor(naics/10000)
replace naics2 = floor(naics/1000) if floor(naics/100000) == 0
replace naics2 = floor(naics/100) if floor(naics/10000) == 0
replace naics2 = 31 if naics2 == 32
replace naics2 = 31 if naics2 == 33
replace naics2 = 44 if naics2 == 45
replace naics2 = 48 if naics2 == 49

bysort comp_year: egen connect_comp_flag = max(connect_flag) if _est_m1 == 1
bysort comp_year _est_m1: replace connect_comp_flag = . if _n ~= 1

table naics2, statistic(mean connect_comp_flag) nformat(%22.0g)
table naics2, statistic(n connect_comp_flag) nformat(%22.0g)

drop connect_comp_flag



/* Data for Figure 1: Run this section last */

bysort naics2 year: egen connect_ind_year = mean(connect_comp_flag) if !missing(connect_comp_flag)
bysort naics2 year connect_ind_year: replace connect_ind_year = . if _n ~= 1
drop if missing(connect_ind_year)
keep naics2 year connect_ind_year







/*  Data for Figure 2  */


bysort comp_year: egen connect_comp_flag = max(connect_flag) if _est_m1 == 1
bysort comp_year _est_m1: replace connect_comp_flag = . if _n ~= 1

table state, statistic(mean connect_comp_flag) nformat(%22.0g)
table state, statistic(n connect_comp_flag) nformat(%22.0g)

drop connect_comp_flag